# 针对K-V型，自动按照K进行分组，输出的形式是二元元组，K不变，而V是分组后值的list
#coding:utf8
from pyspark import SparkContext,SparkConf

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([("a",1),("b",2),("c",3),("a",2),("b",3),("c",4)])

    print(rdd.groupByKey().map(lambda x:(x[0],list(x[1]))).collect())